clear all
set maxvar 30000
version 14
capture log close
set more off

****************************************************************************************************
* -----   Customize the paths and options:   ----- 
****************************************************************************************************
*cd  "Data\"
global MY_IN_PATH   "/Users/ben/Dropbox/RnD_tax_credit/Data"
global MY_OUT_PATH  "/Users/ben/Dropbox/RnD_tax_credit/Data"
*global MY_TEMP_PATH "..."

global MY_OUT_FILE  ${MY_OUT_PATH}out.dta
global MY_LOG_FILE  ${MY_OUT_PATH}cr_tp.log

log using "${MY_LOG_FILE}", text replace
****************************************************************************************************
* import data
*************************************************************************************************

use ${MY_IN_PATH}/patents.dta, clear

* create matrix for uspc level

* gen class dummy variables
foreach num of numlist 1/481 {
	gen c`num' = 0
	replace c`num' = 1 if uspcid == `num'
	dis `num'
}

* sum pats at the gvkey level
foreach num of numlist 1/481 {
	dis `num'
	gegen h`num' = sum(c`num'), by(gvkey ayear)
}


drop c1-c481

gegen no_pats = count(1), by(gvkey ayear)

keep patent uspcid ayear appl_dt gvkey h1-h481 m_kpss no_pats

bysort gvkey ayear: gen n =_n
keep if n ==1
drop n

foreach num of numlist 1/481 {
ren h`num' c`num'
}


* gen pat_stock_per class and gvkey
sort gvkey ayear
foreach num of numlist 1/481 {
gen c`num'_cum5 = 0
dis `num'
}
sort gvkey ayear
foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-1] if gvkey==gvkey[_n-1] & ayear == (ayear[_n-1]+1) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-2] if gvkey==gvkey[_n-2] & ayear == (ayear[_n-2]+2) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-3] if gvkey==gvkey[_n-3] & ayear == (ayear[_n-3]+3) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-4] if gvkey==gvkey[_n-4] & ayear == (ayear[_n-4]+4) 
bysort gvkey (ayear): replace c`num'_cum5 = c`num'_cum5 + c`num'[_n-5] if gvkey==gvkey[_n-5] & ayear == (ayear[_n-5]+5) 
dis `num'
}


foreach num of numlist 1/481 {
gen c`num'_cum10 = 0
dis `num'
}
sort gvkey ayear
foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-1] if gvkey==gvkey[_n-1] & ayear == (ayear[_n-1]+1) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-2] if gvkey==gvkey[_n-2] & ayear == (ayear[_n-2]+2) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-3] if gvkey==gvkey[_n-3] & ayear == (ayear[_n-3]+3) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-4] if gvkey==gvkey[_n-4] & ayear == (ayear[_n-4]+4) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-5] if gvkey==gvkey[_n-5] & ayear == (ayear[_n-5]+5) 	
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-6] if gvkey==gvkey[_n-1] & ayear == (ayear[_n-6]+6) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-7] if gvkey==gvkey[_n-2] & ayear == (ayear[_n-7]+7) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-8] if gvkey==gvkey[_n-3] & ayear == (ayear[_n-8]+8) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-9] if gvkey==gvkey[_n-4] & ayear == (ayear[_n-9]+9) 
bysort gvkey (ayear): replace c`num'_cum10 = c`num'_cum10 + c`num'[_n-10] if gvkey==gvkey[_n-5] & ayear == (ayear[_n-10]+10) 
dis `num'
}



foreach num of numlist 1/481 {
gen c`num'_cum6_10 = 0
dis `num'
}
sort gvkey ayear
foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum6_10 = c`num'_cum6_10 + c`num'[_n-6] if gvkey==gvkey[_n-6] & ayear == (ayear[_n-6]+6) 
bysort gvkey (ayear): replace c`num'_cum6_10 = c`num'_cum6_10 + c`num'[_n-7] if gvkey==gvkey[_n-7] & ayear == (ayear[_n-7]+7) 
bysort gvkey (ayear): replace c`num'_cum6_10 = c`num'_cum6_10 + c`num'[_n-8] if gvkey==gvkey[_n-8] & ayear == (ayear[_n-8]+8) 
bysort gvkey (ayear): replace c`num'_cum6_10 = c`num'_cum6_10 + c`num'[_n-9] if gvkey==gvkey[_n-9] & ayear == (ayear[_n-9]+9) 
bysort gvkey (ayear): replace c`num'_cum6_10 = c`num'_cum6_10 + c`num'[_n-10] if gvkey==gvkey[_n-10] & ayear == (ayear[_n-10]+10) 
dis `num'
}

foreach num of numlist 1/481 {
bysort gvkey (ayear): replace c`num'_cum5 = 1 if c`num'_cum5 > 0 & c`num'_cum5 !=. 
bysort gvkey (ayear): replace c`num'_cum10 = 1 if c`num'_cum10 > 0 & c`num'_cum10 !=.
bysort gvkey (ayear): replace c`num'_cum6_10 = 1 if c`num'_cum6_10 > 0 & c`num'_cum6_10 !=.
dis `num'
}


gen no_cl5 = .
gen no_cl10 = .
gen no_cl6_10 = .
foreach num of numlist 1913/2014{
dis `num'
gegen hno_cl`num' = rowtotal(c1_cum5-c481_cum5) if ayear ==`num'
replace no_cl5 = hno_cl`num' if ayear ==`num'
drop hno_cl`num'
}

foreach num of numlist 1913/2014{
dis `num'
gegen hno_cl`num' = rowtotal(c1_cum10-c481_cum10) if ayear ==`num'
replace no_cl10 = hno_cl`num' if ayear ==`num'
drop hno_cl`num'
}

foreach num of numlist 1913/2014{
dis `num'
gegen hno_cl`num' = rowtotal(c1_cum6_10-c481_cum6_10) if ayear ==`num'
replace no_cl6_10 = hno_cl`num' if ayear ==`num'
drop hno_cl`num'
}


cor no_pats no_cl5 no_cl10 no_cl6_10

preserve
destring gvkey, replace
keep gvkey ayear no_cl5 no_cl10 no_cl6_10
ren ayear year 
save ${MY_IN_PATH}/no_classes.dta, replace
restore

exit


gen no_patnew_cl5 = .
foreach num of numlist 1913/2014{
dis `num'
gegen no_new_cl`num' = rowtotal(new1_sum-new481_sum) if ayear ==`num'
replace no_patnew_cl5 = no_new_cl`num' if ayear ==`num'
drop no_new_cl`num'
}

sum no_new_cl5 no_patnew_cl5
save ${MY_IN_PATH}/techprox_temp4.dta, replace

drop new1-new481 new1_sum-new481_sum


**********************************************************************************
******Technological Proximity
*Jaffe's technological prximity measure  
*T_i_j=(Fi*Fj')/( ((Fi*Fi')^0.5)*(Fj*Fj')^0.5))
* ==> T_i_it-1=(Fi*Fit-1')/( ((Fi*Fi')^0.5)*(Fit-1*Fit-1')^0.5))

*1. gen patent stock in t-1 to t-5
gegen pat_stock5 = rowtotal(c1_cum5-c481_cum5) 

*2. fraction of stock patents in each class
foreach num of numlist 1/481 {
gen pat_stock_fr_`num' = c`num'_cum5/pat_stock5
}

foreach num of numlist 1/481 {
replace pat_stock_fr_`num'  = 0 if pat_stock_fr_`num' ==.
}

*3. Count number of patents per year 
egen npat = rowtotal(c1-c481) 
sum npat

*4. gen fraction of patents in each class in t
foreach num of numlist 1/481 {
gen pat_fr_`num' = c`num'/npat
}

*** Calculate techprox
gen fi_fj = 0
gen fi_sq = 0
gen fj_sq = 0
foreach num of numlist 1/481 {
dis `num'
replace fi_fj = fi_fj + (pat_fr_`num') * (pat_stock_fr_`num') 
replace fi_sq = fi_sq + (pat_fr_`num')^2
replace fj_sq = fj_sq + (pat_stock_fr_`num')^2
}

gen T_i_j= fi_fj / sqrt(fi_sq*fj_sq)
gen tp_raw5 = T_i_j

sum npat no_new_cl5 no_patnew_cl5 tp5 tp_raw5 pat_stock5

ren ayear year
ren npat npat2020
ren no_patnew_cl5 npat_new5_2020
ren tp_raw5 tp_raw5_2020 
gen npat_old5_2020 = npat2020 - npat_new5_2020
gen fr_npat_new5_2020 = npat_new5_2020/npat2020 
ren no_new_cl5 no_new_cl5_2020

sum npat2020 npat_new5_2020 npat_old5_2020 fr_npat_new5_2020 tp_raw5_2020 no_new_cl5_2020

destring gvkey, replace
keep gvkey year npat2020 npat_new5_2020 npat_old5_2020 fr_npat_new5_2020 no_new_cl5_2020 tp5_2020 tp_raw5_2020 
save ${MY_IN_PATH}/techprox_newptas20200826_pdate.dta, replace
